library("dplyr")
library("Seurat")
library("knitr")
library("ggplot2")
library("BiocManager")
library("here")
#BiocManager::install("EnhancedVolcano")
library("EnhancedVolcano") #volcano plot
#install.packages('DESeq2') #for DEG
library("DESeq2")
library("tidyverse") #tidy up data
if (!require("kableExtra")) {install.packages("kableExtra"); require("kableExtra")} # for color brewer
if (!require("RColorBrewer")) {install.packages("RColorBrewer"); require("RColorBrewer")} # for color brewer
if (!require("sctransform")) {install.packages("sctransform"); require("sctransform")} # for data normalization
if (!require("glmGamPoi")) {BiocManager::install('glmGamPoi'); require("glmGamPoi")} # for data normalization, sctransform
if (!require("cowplot")) {install.packages("cowplot"); require("cowplot")} # for figure layout
if (!require("patchwork")) {install.packages("patchwork"); require("patchwork")} # for figure patching
if (!require("openxlsx")) {install.packages("openxlsx"); require("openxlsx")} # to save .xlsx files
set.seed(12345)
# here()Welcome to the Single-Cell Omics Research and Education Club!
If this is your time to the club, I want to extend and extra-special welcome to you!
I’m Jonathan Nelson, an Assistant Professor at the University of Southern California. I’m a wet scientist turned wet+dry scientist. I’ve been working with single-cell RNAseq data for the past 5 years and I’m excited to share what I’ve learned with you.
We believe that bioinformatics is a constantly evolving field, and that ongoing learning and professional development is essential to staying up-to-date. We encourage members to share their knowledge and experiences with each other, and to seek out opportunities for continued learning.
We believe that access to bioinformatics support should be available to everyone. We strive to create a welcoming and inclusive environment where all members can feel comfortable asking for help and contributing to the group.
We believe that working together is key to achieving success in bioinformatics. We value the diversity of perspectives and backgrounds that each member brings, and we encourage open communication and the sharing of ideas.
We believe in conducting ourselves with honesty and professionalism in all our interactions. We hold ourselves to high ethical standards and respect the privacy and confidentiality of all members.
We believe in approaching each other with empathy and kindness. We understand that bioinformatics can be a challenging and sometimes frustrating field, and we strive to support each other through these difficulties.
My expectation is that we have about 6 of these meetings (we’re on 2 of 6) together and then we can re-evaluate if we want to continue as a group or not.
Email me you would like me to add anyone: j.nelson@med.usc.edu
Today’s code (this html file) will be posted to the SCORE website (https://usckrc.github.io/website/score.html)
Cigarettes after Sex: Tejano Blue
https://open.spotify.com/track/383EQ8PDAlqzSe4ayyn2Ct?si=bf7a709fe95146e8
Great Article on Type Piping -> https://uc-r.github.io/pipe
Reference to magrittr package -> https://magrittr.tidyverse.org/
Piping is a great way to clean up your code and make it more readable!
Practically: I usually write code as multiple objects and then convert to piping.
UpSetR
ComplexHeatmap
https://jokergoo.github.io/ComplexHeatmap-reference/book/upset-plot.html
FindAllMarkers -> Upset Plot
SO <- readRDS(here("Data", "All_PT.rds"))
SO2 <- NormalizeData(object = SO)
SO2 <- FindVariableFeatures(object = SO2)
SO2 <- ScaleData(object = SO2)
SO2 <- RunPCA(object = SO2)
SO2 <- FindNeighbors(object = SO2, dims = 1:30)
SO2 <- RunUMAP(object = SO2, dims = 1:30)
Idents(SO2) <- SO2@meta.data$subclass.All
SO@meta.data$subclass.All <- factor(SO@meta.data$subclass.All, levels = c("PTS1", "PTS2", "PTS3", "PTinj"))
Idents(SO) <- SO@meta.data$subclass.All
DimPlot(SO)+
ylab("UMAP 2") +
xlab("UMAP 1") +
theme_classic() +
theme(
plot.title = element_text(hjust = 0.5, size = 20),
axis.line = element_line(size = 1, colour = "black"),
axis.text = element_blank(), # Remove axis text
axis.ticks = element_blank(), # Optional: Remove axis ticks
text = element_text(size = 20)
) + ggtitle("Proximal Tubule Cell Types") df <- FindAllMarkers(SO, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
df %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)top5 <- df %>% distinct(gene, .keep_all = TRUE) %>% group_by(cluster) %>% top_n(5, avg_log2FC)
DoHeatmap(SO2, features = top5$gene) + NoLegend()DotPlot(SO,
features = top5$gene,
cols = c("#0099ff", "#dc143c"),
dot.scale = 8,
dot.min = 0,
scale.max = 100,
scale.min = 0,
col.min = -2.5,
col.max = 2.5) +
# scale_y_discrete(limits = c(Prol"MD", "TAL β", "TAL α")) +
theme(axis.text.x = element_text(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5),
legend.text = element_text(size = 9),
legend.title = element_text(size = 9)) +
RotatedAxis() +
ggtitle("Top 5 Proximal Tubule DEG") ## List of 4
## $ PTS1 : chr [1:189] "Slc5a12" "Nox4" "Spp1" "Gatm" ...
## $ PTS2 : chr [1:537] "Gm33906" "Smarca2" "Cyp4b1" "Cndp2" ...
## $ PTS3 : chr [1:612] "Cyp7b1" "Rnf24" "Fgf1" "Aadat" ...
## $ PTinj: chr [1:430] "Jun" "Lims1" "Rtn4" "Krt20" ...
## PTS1 PTS2 PTS3 PTinj
## 0610005C13Rik 0 1 1 0
## 0610040J01Rik 1 0 0 0
## 0610043K17Rik 0 1 1 0
## 1110019D14Rik 0 0 1 0
## 1600010M07Rik 0 0 0 1
## 1700012D14Rik 0 0 1 0
## A combination matrix with 4 sets and 11 combinations.
## ranges of combination set size: c(1, 389).
## mode for the combination size: distinct.
## sets are on rows.
##
## Top 8 combination sets are:
## PTS1 PTS2 PTS3 PTinj code size
## x 0001 389
## x 0010 293
## x x 0110 285
## x 0100 205
## x 1000 129
## x x 1100 45
## x x 0011 29
## x x 1001 11
##
## Sets are:
## set size
## PTS1 189
## PTS2 537
## PTS3 612
## PTinj 430
## [1] "0610040J01Rik" "4930402H24Rik" "Aak1" "Adra1a"
## [5] "Airn" "Alpl" "App" "Arhgef28"
## [9] "Arsb" "Atp1a1" "Atp6v0b" "Atp6v1b2"
## [13] "Atxn1" "Auts2" "Bcas3" "Bnc2"
## [17] "Car12" "Ccdc141" "Cd63" "Cdkal1"
## [21] "Cfap69" "Cgnl1" "Cltrn" "Col27a1"
## [25] "Comt" "Cox4i1" "Cox6c" "Cox7c"
## [29] "Dip2c" "Dleu2" "Dock9" "Dpyd"
## [33] "Eef1a1" "Epb41l3" "Errfi1" "Ext1"
## [37] "Fam13a" "Fgfr2" "Fhod3" "Folh1"
## [41] "Frmd4b" "Gatm" "Gkap1" "Glud1"
## [45] "Gm37245" "Gm42418" "Gpx3" "Id2"
## [49] "Idh1" "Igfbp7" "Igsf11" "Immp2l"
## [53] "Iqgap2" "Itch" "Itpr2" "Kank1"
## [57] "Kif12" "Lamp2" "Lgmn" "Lgr4"
## [61] "Lima1" "Maf" "Magi1" "Magi2"
## [65] "Mapk4" "Msi2" "Myo10" "Myo9a"
## [69] "Ndrg1" "Nectin3" "Neu1" "Nhs"
## [73] "Nox4" "P3h2" "Pam" "Pcsk5"
## [77] "Phldb2" "Plcl1" "Plekha1" "Ppargc1a"
## [81] "Ppm1h" "Ppp1r16b" "Ppp2r3a" "Ppp2r5e"
## [85] "Prkag2" "Prodh" "Prodh2" "Ptprd"
## [89] "Ptprj" "Ptprk" "Ralgapa2" "Rbpms"
## [93] "Retreg1" "Rps27" "Rsrp1" "S100g"
## [97] "Sh3rf1" "Shb" "Sik3" "Slc13a1"
## [101] "Slc16a10" "Slc16a12" "Slc18a1" "Slc22a23"
## [105] "Slc4a4" "Slc5a12" "Slc5a2" "Slc6a19"
## [109] "Slc7a7" "Slc7a8" "Smyd3" "Sntb2"
## [113] "Sorbs1" "Spink1" "Spp1" "Ssh2"
## [117] "Steap2" "Tcf12" "Tfcp2l1" "Timd2"
## [121] "Tmem106a" "Tmem108" "Trpm3" "Tsc22d1"
## [125] "Unc5c" "Vps13b" "Wdfy3" "Wdr72"
## [129] "Zfp608"
VlnPlot(SO, features = c("0610040J01Rik"), pt.size = 0) +
theme(axis.line = element_line(size = 1, colour = "black"),
text = element_text(size=20),
axis.text.x = element_text(color = "black", size = 16, angle = 45, hjust = 1, vjust = 1),
legend.position = "none"
) + xlab("") ## [1] "0610005C13Rik" "0610043K17Rik" "4430402I18Rik" "4930533I22Rik"
## [5] "Aacs" "Abhd14b" "AC149090.1" "Acad10"
## [9] "Acat1" "Acot12" "Acot13" "Acsm1"
## [13] "Acsm3" "Acss2" "Acy3" "Adh1"
## [17] "Adhfe1" "Adtrp" "Agbl4" "Agps"
## [21] "AI314278" "Ak4" "Akr1c21" "Aldh1l1"
## [25] "Aldh8a1" "Amacr" "Anapc15" "Ankib1"
## [29] "Ankrd13c" "Ar" "Arhgap18" "Arhgap32"
## [33] "Arhgap42" "Aspdh" "Atp11a" "Atrnl1"
## [37] "Atxn7l1" "B3gat2" "B430010I23Rik" "B4galt5"
## [41] "BC005561" "BC024386" "Bdh2" "Bphl"
## [45] "Calml4" "Ccdc107" "Ccdc6" "Ccs"
## [49] "Cd36" "Ces1f" "Chpt1" "Chuk"
## [53] "Cldn10" "Clmn" "Clpx" "Clybl"
## [57] "Cmah" "Col4a3" "Col4a4" "Cpped1"
## [61] "Cryzl2" "Csad" "Csgalnact1" "Csnk1g3"
## [65] "Ctdspl" "Ctnna2" "Cubn" "Cyp2j11"
## [69] "Cyp2j5" "Dalrd3" "Dcun1d3" "Ddi2"
## [73] "Ddx5" "Dglucy" "Dhtkd1" "Dis3l2"
## [77] "Dnajc12" "E130102H24Rik" "Echdc2" "Eci2"
## [81] "Eci3" "Ehhadh" "Eif4g3" "Enpp3"
## [85] "Ephx2" "Erc2" "Erlin1" "Etv6"
## [89] "Exoc4" "Fah" "Fam214a" "Fam83g"
## [93] "Fbp1" "Fmo5" "Fnip2" "Folr1"
## [97] "Fras1" "Galnt14" "Gbf1" "Gcdh"
## [101] "Gcnt1" "Gcnt2" "Ggt1" "Ghr"
## [105] "Gk" "Glb1" "Glt1d1" "Gm19950"
## [109] "Gm35281" "Gm39033" "Gm42397" "Gm43190"
## [113] "Gm45792" "Gm5089" "Gpc4" "Gpd2"
## [117] "Gpr108" "Gpr137b" "Greb1l" "Gss"
## [121] "Gstt2" "Gstz1" "Gucd1" "Hacd2"
## [125] "Herc4" "Herpud1" "Hgd" "Hibadh"
## [129] "Hmgcs1" "Hnf1aos1" "Hnf1b" "Hnf4a"
## [133] "Hnf4aos" "Hnf4g" "Hoga1" "Hoxb3os"
## [137] "Hpn" "Hsd3b3" "Hykk" "Iah1"
## [141] "Isoc2a" "Itgb8" "Itpr1" "Ivd"
## [145] "Jarid2" "Kansl1l" "Keg1" "Khk"
## [149] "Kyat1" "Lactb2" "Ldc1" "Ldhd"
## [153] "Lin52" "Lpl" "Lrmda" "Lrp2"
## [157] "Lrpap1" "Lrrk2" "Lypla1" "Macrod1"
## [161] "Macrod2" "Map2k6" "Mapk14" "Mat2a"
## [165] "Me1" "Med14" "Mep1a" "Mertk"
## [169] "Mettl26" "Mettl7a1" "Mfsd4b5" "Mlxipl"
## [173] "Mpc2" "Mtor" "Mtus1" "Myo6"
## [177] "Nat8f6" "Ndel1" "Nfia" "Nfib"
## [181] "Nfyc" "Nipsnap1" "Nit1" "Nt5c2"
## [185] "Ntn1" "Nudt19" "Osbpl3" "Osbpl8"
## [189] "Osbpl9" "Pank1" "Parp8" "Patj"
## [193] "Pcyt2" "Pde4c" "Pde4d" "Pdzd2"
## [197] "Pdzk1" "Pdzrn3" "Pecr" "Pim3"
## [201] "Pipox" "Pkd2" "Pkp4" "Plxdc2"
## [205] "Polr3e" "Por" "Ppm1k" "Prkn"
## [209] "Proc" "Ptpn3" "Pxmp4" "Rab3ip"
## [213] "Rbm47" "Rbpms2" "Rdh16" "Resf1"
## [217] "Rffl" "Rida" "Rin2" "Ror1"
## [221] "Rorc" "Scp2" "Sec11a" "Setbp1"
## [225] "Sfxn5" "Sil1" "Sirt3" "Sirt7"
## [229] "Slc16a9" "Slc17a1" "Slc17a3" "Slc1a1"
## [233] "Slc22a12" "Slc22a14" "Slc22a18" "Slc22a28"
## [237] "Slc22a30" "Slc25a21" "Slc25a51" "Slc27a2"
## [241] "Slc35a3" "Slc3a1" "Slc47a1" "Slc5a10"
## [245] "Slc5a8" "Slc6a20b" "Slc7a13" "Slco1a6"
## [249] "Slco3a1" "Smarca2" "Srr" "St8sia1"
## [253] "Stx16" "Sufu" "Sugct" "Sycp3"
## [257] "Tasor" "Tef" "Them7" "Thnsl2"
## [261] "Thrb" "Tmem116" "Tmem189" "Tns1"
## [265] "Tpk1" "Tpmt" "Trappc9" "Trim7"
## [269] "Trps1" "Ugt3a2" "Ugt8a" "Upb1"
## [273] "Usp2" "Vdr" "Veph1" "Vps13a"
## [277] "Vti1a" "Vwa8" "Wwc2" "Wwp1"
## [281] "Xylb" "Zbtb20" "Zfp444" "Zfp810"
## [285] "Zhx3"
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Los_Angeles
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ComplexHeatmap_2.16.0 UpSetR_1.4.0
## [3] openxlsx_4.2.5.2 patchwork_1.2.0
## [5] cowplot_1.1.3 glmGamPoi_1.12.2
## [7] sctransform_0.4.1 RColorBrewer_1.1-3
## [9] kableExtra_1.3.4 lubridate_1.9.2
## [11] forcats_1.0.0 stringr_1.5.1
## [13] purrr_1.0.2 readr_2.1.5
## [15] tidyr_1.3.1 tibble_3.2.1
## [17] tidyverse_2.0.0 DESeq2_1.40.2
## [19] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [21] MatrixGenerics_1.12.3 matrixStats_1.2.0
## [23] GenomicRanges_1.52.1 GenomeInfoDb_1.36.4
## [25] IRanges_2.34.1 S4Vectors_0.38.2
## [27] BiocGenerics_0.46.0 EnhancedVolcano_1.18.0
## [29] ggrepel_0.9.5 here_1.0.1
## [31] BiocManager_1.30.22 ggplot2_3.5.1
## [33] knitr_1.45 SeuratObject_5.0.1
## [35] Seurat_4.4.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.3.1 later_1.3.1
## [4] bitops_1.0-7 polyclip_1.10-6 lifecycle_1.0.4
## [7] doParallel_1.0.17 rprojroot_2.0.4 globals_0.16.2
## [10] lattice_0.21-8 MASS_7.3-60 magrittr_2.0.3
## [13] limma_3.56.2 plotly_4.10.4 sass_0.4.9
## [16] rmarkdown_2.25 jquerylib_0.1.4 yaml_2.3.7
## [19] httpuv_1.6.11 zip_2.3.0 spam_2.10-0
## [22] sp_2.1-3 spatstat.sparse_3.0-3 reticulate_1.34.0
## [25] pbapply_1.7-2 abind_1.4-5 zlibbioc_1.46.0
## [28] rvest_1.0.3 Rtsne_0.17 RCurl_1.98-1.12
## [31] circlize_0.4.15 GenomeInfoDbData_1.2.10 irlba_2.3.5.1
## [34] listenv_0.9.1 spatstat.utils_3.0-4 goftest_1.2-3
## [37] spatstat.random_3.2-2 fitdistrplus_1.1-11 parallelly_1.36.0
## [40] svglite_2.1.1 leiden_0.4.3.1 codetools_0.2-19
## [43] DelayedArray_0.26.7 xml2_1.3.6 shape_1.4.6
## [46] tidyselect_1.2.1 farver_2.1.1 spatstat.explore_3.2-5
## [49] webshot_0.5.5 jsonlite_1.8.8 GetoptLong_1.0.5
## [52] ellipsis_0.3.2 progressr_0.14.0 iterators_1.0.14
## [55] ggridges_0.5.6 survival_3.5-5 systemfonts_1.0.5
## [58] foreach_1.5.2 tools_4.3.1 ica_1.0-3
## [61] Rcpp_1.0.12 glue_1.8.0 gridExtra_2.3
## [64] xfun_0.40 withr_3.0.2 fastmap_1.1.1
## [67] fansi_1.0.4 digest_0.6.33 timechange_0.2.0
## [70] R6_2.5.1 mime_0.12 colorspace_2.1-0
## [73] Cairo_1.6-1 scattermore_1.2 tensor_1.5
## [76] spatstat.data_3.0-4 utf8_1.2.3 generics_0.1.3
## [79] data.table_1.14.10 httr_1.4.7 htmlwidgets_1.6.4
## [82] S4Arrays_1.2.0 uwot_0.1.16 pkgconfig_2.0.3
## [85] gtable_0.3.6 lmtest_0.9-40 XVector_0.40.0
## [88] htmltools_0.5.8.1 dotCall64_1.1-1 clue_0.3-65
## [91] scales_1.3.0 png_0.1-8 rstudioapi_0.15.0
## [94] rjson_0.2.21 tzdb_0.4.0 reshape2_1.4.4
## [97] nlme_3.1-162 GlobalOptions_0.1.2 cachem_1.0.8
## [100] zoo_1.8-12 KernSmooth_2.23-21 vipor_0.4.5
## [103] parallel_4.3.1 miniUI_0.1.1.1 ggrastr_1.0.2
## [106] pillar_1.9.0 vctrs_0.6.5 RANN_2.6.1
## [109] promises_1.2.1 xtable_1.8-4 cluster_2.1.4
## [112] beeswarm_0.4.0 evaluate_0.23 magick_2.8.0
## [115] cli_3.6.1 locfit_1.5-9.8 compiler_4.3.1
## [118] rlang_1.1.5 crayon_1.5.2 future.apply_1.11.1
## [121] labeling_0.4.3 ggbeeswarm_0.7.2 plyr_1.8.9
## [124] stringi_1.7.12 viridisLite_0.4.2 deldir_2.0-2
## [127] BiocParallel_1.34.2 munsell_0.5.1 lazyeval_0.2.2
## [130] spatstat.geom_3.2-8 Matrix_1.6-5 hms_1.1.3
## [133] future_1.33.1 shiny_1.8.0 highr_0.10
## [136] ROCR_1.0-11 igraph_1.6.0 bslib_0.8.0